library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
odiBattingFielding <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Statistics\\odicareerbattingandfielding.csv")

df <- odiBattingFielding[, 1:11]
head(df)
##                           Name Matches Innings Not_Outs Runs High_Score
## 1            Aaron James Finch      59      57        1 2169        148
## 2       Aavishkar Madhav Salvi       4       3        1    4          4
## 3             Abhimanyu Mithun       5       3        0   51         24
## 4         Abhishek Mohan Nayar       3       1        1    0          0
## 5 Abraham Benjamin de Villiers     197     189       34 8524        162
## 6           Adam Charles Voges      31      28        9  870        112
##   Average No_Of_100 No_Of_50 Strike_Rate Catches_Taken
## 1   38.73         7       11       86.96            30
## 2    2.00         0        0       28.57             2
## 3   17.00         0        0       92.72             1
## 4      NA         0        0        0.00             0
## 5   54.99        24       47       99.96           161
## 6   45.78         1        4       87.17             7
df <- na.omit(df)
head(df)
##                           Name Matches Innings Not_Outs Runs High_Score
## 1            Aaron James Finch      59      57        1 2169        148
## 2       Aavishkar Madhav Salvi       4       3        1    4          4
## 3             Abhimanyu Mithun       5       3        0   51         24
## 5 Abraham Benjamin de Villiers     197     189       34 8524        162
## 6           Adam Charles Voges      31      28        9  870        112
## 7         Adam Craig Gilchrist     286     278       11 9595        172
##   Average No_Of_100 No_Of_50 Strike_Rate Catches_Taken
## 1   38.73         7       11       86.96            30
## 2    2.00         0        0       28.57             2
## 3   17.00         0        0       92.72             1
## 5   54.99        24       47       99.96           161
## 6   45.78         1        4       87.17             7
## 7   35.93        16       55       96.89           416
summary(df)
##                            Name        Matches          Innings      
##  Aaron James Finch           :  1   Min.   :  1.00   Min.   :  1.00  
##  Aavishkar Madhav Salvi      :  1   1st Qu.: 25.00   1st Qu.: 14.00  
##  Abhimanyu Mithun            :  1   Median : 68.00   Median : 42.00  
##  Abraham Benjamin de Villiers:  1   Mean   : 99.22   Mean   : 78.05  
##  Adam Charles Voges          :  1   3rd Qu.:153.00   3rd Qu.:114.00  
##  Adam Craig Gilchrist        :  1   Max.   :463.00   Max.   :452.00  
##  (Other)                     :245                                    
##     Not_Outs          Runs         High_Score        Average     
##  Min.   : 0.00   Min.   :    0   Min.   :  0.00   Min.   : 0.00  
##  1st Qu.: 3.00   1st Qu.:  123   1st Qu.: 30.50   1st Qu.:11.82  
##  Median : 9.00   Median :  737   Median : 72.00   Median :23.39  
##  Mean   :14.26   Mean   : 2065   Mean   : 79.96   Mean   :23.94  
##  3rd Qu.:21.50   3rd Qu.: 2546   3rd Qu.:123.00   3rd Qu.:34.30  
##  Max.   :72.00   Max.   :18426   Max.   :264.00   Max.   :90.50  
##                                                                  
##    No_Of_100        No_Of_50      Strike_Rate     Catches_Taken  
##  Min.   : 0.00   Min.   : 0.00   Min.   :  0.00   Min.   :  0.0  
##  1st Qu.: 0.00   1st Qu.: 0.00   1st Qu.: 71.22   1st Qu.:  7.0  
##  Median : 0.00   Median : 2.00   Median : 80.64   Median : 19.0  
##  Mean   : 2.96   Mean   :11.33   Mean   : 79.30   Mean   : 41.1  
##  3rd Qu.: 3.00   3rd Qu.:14.00   3rd Qu.: 89.70   3rd Qu.: 49.5  
##  Max.   :49.00   Max.   :96.00   Max.   :157.89   Max.   :416.0  
## 
set.seed(20)

df1 <- df %>%
  select(3, 10)

df2 <- df %>%
  select(3, 4, 8)

OBFCluster <- kmeans(df1, 5)

OBFCluster$cluster <- as.factor(OBFCluster$cluster)

ggplot(df1, aes(Innings, Strike_Rate, color = OBFCluster$cluster)) +
  geom_point(size = 2) +
  scale_color_hue(labels = c("Best players",  "Bad Players", "Good players", "Useless", "Worst Players")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  ggtitle("ODI Career Matches vs Strike Rate")

p <- plot_ly(df1, x = ~Innings, y = ~Strike_Rate, type = 'scatter', 
             mode = 'markers', color = OBFCluster$cluster, 
             text = ~paste('Name: ', df$Name)) %>%
  layout(title = "Cluster of Averages (batsmen)")

p
OBFCluster2 <- kmeans(df2, 5)
OBFCluster2$cluster <- as.factor(OBFCluster2$cluster)

ggplot(df, aes(Innings, No_Of_100, color = OBFCluster2$cluster)) +
  geom_point(size = 2) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  ggtitle("Players with more notouts and centuries")

q <- plot_ly(df2, x = ~Innings, y = ~No_Of_100, type = 'scatter',
             mode = 'markers', color = OBFCluster2$cluster,
             text = ~paste('Name: ', df$Name)) %>%
  layout(title = "Cluster of notouts and centuries")
q